## Courting the President replication
##
## contender analysis 
## Estimand: ATT 
## Dependent variable: presIdeoVote 
##
## Reproduce: 
##      - Table 1 (top panel) 
##      - Figure 1 (TASMD & KS statistic)
##      - Figure 2 (Q-Q Plot)
##
## Kevin Quinn and Guoer Liu
## 11/19/2024


# setwd("") # set at your current working directory
set.seed(91800198)




source("func.R")
library(haven)
library(WeightIt)
library(Matching)
library(cem)
library(data.table)
library(RColorBrewer)
library(lattice)



mydata <- read_dta("Contender.dta")
mydata <- as.data.frame(mydata)


mydata <- mydata[, c("presIdeoVote", "treatFinal0", "judgeJCS", "presDist",
                     "panelDistJCS", "circmed", "sctmed", "coarevtc",
                     "casepub", "judge")]
mydata <- na.omit(mydata)

cov.names.sub <- c("judgeJCS", "presDist", "panelDistJCS", "circmed",
                   "sctmed", "coarevtc", "casepub")

treat.form <- "treatFinal0 ~ judgeJCS + presDist + panelDistJCS + circmed + sctmed + coarevtc + casepub"


reg.general.form <- "presIdeoVote ~ judgeJCS + presDist + panelDistJCS + circmed + sctmed + coarevtc + casepub"




estimator <- NULL
estimate <- NULL
SE <- NULL
Eff.n <- NULL
Eff.n.ratio <- NULL
DFBETA.min <- NULL
DFBETA.max <- NULL
DFBETA.min.who <- NULL
DFBETA.max.who <- NULL
extrap.treat <- NULL
extrap.control <- NULL





## #############################################################
## general regression
estimator <- c(estimator, "reg.general")
mydata.1 <- mydata[mydata$treatFinal0==1,]
mydata.0 <- mydata[mydata$treatFinal0==0,]

lm.out.0 <- lm(reg.general.form, data=mydata.0)

m0 <- predict(lm.out.0, newdata=mydata.1)

estimate <- c(estimate, mean(mydata.1$presIdeoVote - m0))


## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata[bs.inds,]

    mydata.bs.1 <- mydata.bs[mydata.bs$treatFinal0==1,]
    mydata.bs.0 <- mydata.bs[mydata.bs$treatFinal0==0,]

    lm.out.0 <- lm(reg.general.form, data=mydata.bs.0)
    
    m0 <- predict(lm.out.0, newdata=mydata.bs.1)

    estim.bs[b] <- mean(mydata.bs.1$presIdeoVote - m0)
}
SE <- c(SE, sd(estim.bs))


## get the WATE weights
X1 <- as.matrix(mydata[mydata$treatFinal0==1, cov.names.sub])
X1 <- cbind(1, X1)
X0 <- as.matrix(mydata[mydata$treatFinal0==0, cov.names.sub])
X0 <- cbind(1, X0)
n1 <- nrow(X1)
n0 <- nrow(X0)
n <- n1 + n0
treat.vec <- c(rep(1, n1), rep(0, n0))
y1 <- mydata[mydata$treatFinal0==1, "presIdeoVote"]
y0 <- mydata[mydata$treatFinal0==0, "presIdeoVote"]
X <- rbind(X1, X0)
y <- as.matrix(c(y1, y0), nrow=n, ncol=1)
ones.n <- matrix(1, nrow=n1, ncol=1)

w1 <- rep(1, n1)
w0 <- t(ones.n) %*% X1 %*% solve(t(X0) %*% X0) %*% t(X0)
w <- as.vector(c(w1, w0))

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(y, treat.vec, w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)

mydata.contender.reg.att <- rbind(mydata.1, mydata.0)
mydata.contender.reg.att$w <- w

DFBETA.min.who <- c(DFBETA.min.who, mydata.contender.reg.att[min.ind, "judge"])
DFBETA.max.who <- c(DFBETA.max.who, mydata.contender.reg.att[max.ind, "judge"])

extrap.1 <- sum(abs(w1) * (w1 < 0)) / sum(w1 * (w1 >= 0))
extrap.0 <- sum(abs(w0) * (w0 < 0)) / sum(w0 * (w0 >= 0))

extrap.treat <- c(extrap.treat, extrap.1)
extrap.control <- c(extrap.control, extrap.0)


## END general regression (two regression models)
## #############################################################








## #############################################################
## general regression (constant effect)
estimator <- c(estimator, "reg.constant")
reg.cons.form <- "presIdeoVote ~ treatFinal0 + judgeJCS + presDist + panelDistJCS + circmed + sctmed + coarevtc + casepub"
lm.out.cons <- lm(reg.cons.form, data=mydata)

estimate <- c(estimate, summary(lm.out.cons)$coefficient[2, 1])

## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata[bs.inds,]

    lm.out.cons <- lm(reg.cons.form, data=mydata.bs)

    estim.bs[b] <- summary(lm.out.cons)$coefficient[2, 1]
}
SE <- c(SE, sd(estim.bs))


## get the WATE weights
treatment <- mydata$treatFinal0
y <- mydata$presIdeoVote
n <- length(treatment)
n1 <- sum(treatment == 1)
n0 <- sum(treatment == 0)

X <- as.matrix(mydata[, cov.names.sub])
X.good.cols <- qr(X)$pivot[seq_len(qr(X)$rank)]
ordvec <- order(treatment)
treat <- treatment[ordvec]
X.cov <- X[ordvec, X.good.cols]
X <- cbind(1, treat, X.cov)
X0 <- cbind(1, 0, X.cov)
X1 <- cbind(1, 1, X.cov)

ones <- matrix(1, nrow = n, ncol = 1)
    
w1.full <- t(ones) %*% X1 %*% solve(crossprod(X)) %*% t(X)
w0.full <- t(ones) %*% X0 %*% solve(crossprod(X)) %*% t(X)
w1 <- w1.full[(n0+1):n] - w0.full[(n0+1):n]
w0 <- w0.full[1:n0] - w1.full[1:n0]
w.ordvec <- c(w0, w1) # stacked w
w <- w.ordvec[order(ordvec)] # w in the original order

w1 <- w[treatment == 1]
w0 <- w[treatment == 0]

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(y, treatment, w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, mydata[min.ind, "judge"]) 
DFBETA.max.who <- c(DFBETA.max.who, mydata[max.ind, "judge"]) 

extrap.1 <- sum(abs(w1) * (w1 < 0)) / sum(w1 * (w1 >= 0))
extrap.0 <- sum(abs(w0) * (w0 < 0)) / sum(w0 * (w0 >= 0))

extrap.treat <- c(extrap.treat, extrap.1)
extrap.control <- c(extrap.control, extrap.0)


mydata.contender.reg_cons.att <- cbind(mydata, w)

## END general regression (constant effect)
## #############################################################










## #############################################################
## propensity score weighting (logit)
estimator <- c(estimator, "sipw.logit")

glm.out <- glm(treat.form, data=mydata, family=binomial(logit))
mydata$pscores <- glm.out$fitted
mydata$w <- 1 
mydata$w[mydata$treatFinal0==0] <-
    mydata$pscores[mydata$treatFinal0==0] /
    (1 - mydata$pscores[mydata$treatFinal0==0])

ATT.hat <- sum(mydata$w * mydata$treatFinal0 * mydata$presIdeoVote) /
    sum(mydata$w * mydata$treatFinal0) -
    sum(mydata$w * (1-mydata$treatFinal0) * mydata$presIdeoVote) /
    sum(mydata$w * (1-mydata$treatFinal0))

estimate <- c(estimate, ATT.hat)


## SE via nonparametric bootstrap
B <- 5000
n <- nrow(mydata)
estim.bs <- rep(NA, B)
for (b in 1:B){
    bs.inds <- sample(1:n, n, replace=TRUE)
    mydata.bs <- mydata[bs.inds,]

    glm.out.bs <- glm(treat.form, data=mydata.bs,
                      family=binomial(logit))
    mydata.bs$pscores <- glm.out.bs$fitted
    mydata.bs$w <- 1 
    mydata.bs$w[mydata.bs$treatFinal0==0] <-
        mydata.bs$pscores[mydata.bs$treatFinal0==0] /
        (1 - mydata.bs$pscores[mydata.bs$treatFinal0==0]) 
     
    ATT.hat.bs <- sum(mydata.bs$w * mydata.bs$treatFinal0 *
                      mydata.bs$presIdeoVote) /
        sum(mydata.bs$w * mydata.bs$treatFinal0) -
        sum(mydata.bs$w * (1-mydata.bs$treatFinal0) *
            mydata.bs$presIdeoVote) /
        sum(mydata.bs$w * (1-mydata.bs$treatFinal0))
    
    
    estim.bs[b] <- ATT.hat.bs
}
SE <- c(SE, sd(estim.bs))


w0 <- mydata$w[mydata$treatFinal0==0]
w1 <- mydata$w[mydata$treatFinal0==1]
n1 <- sum(mydata$treatFinal0)
n <- nrow(mydata)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata$presIdeoVote, mydata$treatFinal0,
                              mydata$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, mydata[min.ind, "judge"])

DFBETA.max.who <- c(DFBETA.max.who, mydata[max.ind, "judge"])


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)


mydata.contender.sipw.logit.att <- mydata

## END propensity score weighting (logit)
## #############################################################







## #############################################################
## entropy balancing
estimator <- c(estimator, "entropy.balancing")

eb.out <- weightit(as.formula(treat.form), data=mydata,
                   method="ebal",
                   estimand="ATT")
mydata$w <- eb.out$weights

wls.out <- lm(presIdeoVote ~ treatFinal0, weights=w, data=mydata)

ATT.hat <- coef(wls.out)[2]

estimate <- c(estimate, ATT.hat)

SE <- c(SE, sqrt(vcov(wls.out)[2,2]))

w0 <- mydata$w[mydata$treatFinal0==0]
w1 <- mydata$w[mydata$treatFinal0==1]
n1 <- sum(mydata$treatFinal0)
n <- nrow(mydata)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata$presIdeoVote, mydata$treatFinal0,
                              mydata$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)


DFBETA.min.who <- c(DFBETA.min.who, mydata[min.ind, "judge"])
DFBETA.max.who <- c(DFBETA.max.who, mydata[max.ind, "judge"])


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)


mydata.contender.entropy.balancing.att <- mydata

## END entropy balancing
## #############################################################










## #############################################################
## 1:1 NN matching

set.seed(91800198)

estimator <- c(estimator, "NN.matching")

treat.vec <- mydata$treatFinal0
X <- as.matrix(mydata[, c(cov.names.sub)])
y <- mydata$presIdeoVote

# Matching by linearized propensity score

glm.out <- glm(treat.vec ~ X, family=binomial(logit))
l.pscores <- predict(glm.out)


match.out <- Match(Y = y, Tr=treat.vec, X=l.pscores, estimand="ATT",
                   M=1, ties=FALSE, replace=TRUE, BiasAdjust=FALSE)

estimate <- c(estimate, match.out$est)
SE <- c(SE, match.out$se.standard)

mydata$w <- 0
for (i in 1:nrow(mydata)){
     mydata$w[i] <- sum(match.out$index.control == i) +
            sum(match.out$index.treated == i)
}

w0 <- mydata$w[mydata$treatFinal0==0]
w1 <- mydata$w[mydata$treatFinal0==1]
n1 <- sum(mydata$treatFinal0)
n <- nrow(mydata)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata$presIdeoVote, mydata$treatFinal0,
                              mydata$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, mydata[min.ind, "judge"])

DFBETA.max.who <- c(DFBETA.max.who, mydata[max.ind, "judge"])


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)

mydata.contender.NN.matching.att <- mydata

## END 1:1 NN matching
## #############################################################



## #############################################################
## coarsened exact matching
estimator <- c(estimator, "CEM")

cem.out <- cem(treatment="treatFinal0", data=mydata,
               drop=c("presIdeoVote", "treatFinal0", "judge",
                      "pscores", "w"))
att.out <- att(cem.out, formula = presIdeoVote ~ treatFinal0,
               data=mydata)
s <- summary(att.out)


estimate <- c(estimate, s[2,1])
SE <- c(SE, s[2,2])

mydata$w <- cem.out$w

w0 <- mydata$w[mydata$treatFinal0==0]
w1 <- mydata$w[mydata$treatFinal0==1]
n1 <- sum(mydata$treatFinal0)
n <- nrow(mydata)
n0 <- n - n1

n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)

DFBETA.out <- DFBETA.weighted(mydata$presIdeoVote, mydata$treatFinal0,
                              mydata$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))

min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, mydata[min.ind, "judge"])

DFBETA.max.who <- c(DFBETA.max.who, mydata[max.ind, "judge"])


extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)

mydata.contender.CEM.att <- mydata

## END coarsened exact matching
## #############################################################


## #############################################################
## TABLE

## Table 1 (top panel) ###

## contender
contender.att <- data.frame(estimator=estimator,
                            estimate=estimate,
                            SE=SE,
                            #Z = estimate / SE,
                            Eff.n=Eff.n,
                            Eff.n.ratio=Eff.n.ratio,
                            DFBETA.min=DFBETA.min,
                            DFBETA.min.who=DFBETA.min.who,
                            DFBETA.max=DFBETA.max,
                            DFBETA.max.who=DFBETA.max.who,
                            extrap.control=extrap.control,
                            extrap.treat=extrap.treat)

print(contender.att)
#library(xtable)
#xtable(contender.att, digits=c(0, 0, rep(3, 5), 0, 3, 0, rep(3, 2)))


## #############################################################
## FIGURES 


### Figure 1: TASMD (left panel) ###

mydata.raw <- mydata
mydata.raw$w <- 1

data.list <- list(mydata.raw,
                  mydata.contender.reg.att,
                  mydata.contender.reg_cons.att,
                  mydata.contender.sipw.logit.att,
                  mydata.contender.entropy.balancing.att,
                  mydata.contender.NN.matching.att,
                  mydata.contender.CEM.att)

TASMD.contender.data <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                             ncol = length(data.list)))

colnames.var <- c("Raw Data",
                  "Regression (MRI)",
                  "Regression (URI)",
                  "PScores",
                  "Ebal",
                  "NNmatch", 
                  "CEM")

colnames(TASMD.contender.data) <- colnames.var
rownames(TASMD.contender.data) <- cov.names.sub

for(i in 1:length(data.list)){
    mydata <- data.list[[i]]
    for (xvar in cov.names.sub){
        tasmd <- TASMD(x=mydata[,xvar], w=mydata$w,
                       treatment=mydata$treatFinal0)
        TASMD.contender.data[xvar, i] <- tasmd
    }
}

xvar.names <- c("JCS score", "Ideo. Distance", "Panel JCS",
                "Circuit med", "SC med", "Court reversal", "Case pub.")

TASMD.contender.data$xvar <- xvar.names


TASMD.contender.long <- melt(setDT(TASMD.contender.data),
                            id.vars="xvar", variable.name = "method")


colors <- brewer.pal(7, "Dark2")
colors <- replace(colors, c(1, 2), colors[c(2, 1)])

#pdf("TASMD-contenders.pdf", height=7, width=6, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), cex = 1,
                         col = colors                                    
                         ))
print(dotplot(xvar ~ value, data = TASMD.contender.long,
              groups = method,
              xlab="TASMD",
              scales=list(y=list(cex=1.2), x=list(cex=1.2)),
              panel=function(x, y, ...){
                  panel.abline(h=1:42,col="grey95", ...)
                  panel.abline(v=0.1, col="grey85")
                  panel.xyplot(x, y, ...)
              },
              key=simpleKey(levels(TASMD.contender.long$method),
                            space="right")
              ))
trellis.par.set(trell.par)  ## go back to default
#dev.off()



## Figure 1: KS statistics (right panel) ###


KS.contender.data <- as.data.frame(matrix(nrow = length(cov.names.sub), 
                                             ncol = length(data.list)))
colnames(KS.contender.data) <- colnames.var
rownames(KS.contender.data) <- cov.names.sub

for(i in 1:length(data.list)){
    mydata <- data.list[[i]]
    for (xvar in cov.names.sub){
        KS <- wtd.ks(x=mydata[,xvar], w=mydata$w,
                     treatment=mydata$treatFinal0)
        KS.contender.data[xvar, i] <- KS
    }
}

KS.contender.data$xvar <- xvar.names


KS.contender.long <- melt(setDT(KS.contender.data),
                            id.vars="xvar", variable.name = "method")


colors <- brewer.pal(7, "Dark2")
colors <- replace(colors, c(1, 2), colors[c(2, 1)])

#pdf("KS-contenders.pdf", height=7, width=6, family = "Times")
trell.par <- trellis.par.get() ## get default settings
trellis.par.set(superpose.symbol =
                    list(pch = c(16, 0:3, 18, 4), cex = 1,
                         col = colors                                    
                         ))
print(dotplot(xvar ~ value, data = KS.contender.long,
              groups = method,
              xlab="KS Statistics",
              scales=list(y=list(cex=1.2), x=list(cex=1.2)),
              panel=function(x, y, ...){
                  panel.abline(h=1:42,col="grey95", ...)
                  panel.abline(v=0, col="grey85")
                  panel.xyplot(x, y, ...)
              },
              key=simpleKey(levels(KS.contender.long$method),
                            space="right")
              ))
trellis.par.set(trell.par)  ## go back to default
#dev.off()



## Figure 2: Q-Q plot for Ideo. Distance (raw vs. entropy-bal weighted) ###

# Save the current plotting parameters
old_par <- par(no.readonly = TRUE)

# Open a PDF device
# pdf("wtdQQ-presIdeoDist-contenders.pdf", height=7, width=6, family = "Times")

# Adjust the plotting area to accommodate the legend
par(mfrow=c(1, 1), mar=c(8, 4, 4, 2) + 0.1)
# First layer
wtd.qqplot(x = data.list[[1]][, "presDist"], weights = data.list[[1]]$w,
           treatment = data.list[[1]]$treatFinal0,
           points.cex = 1.2, points.pch = 16,
           points.col = "#BF2C23", points.alpha = 0.08,
           x0lab = "Control",
           x1lab = "Treated")
# Second plot overlayed
wtd.qqplot(x = data.list[[5]][, "presDist"], weights = data.list[[5]]$w,
           treatment = data.list[[5]]$treatFinal0, add = TRUE,
           points.cex = 1.2, points.pch = 4, points.lwd = 1.5,
           points.col = "#2F67B1", points.alpha = 1,
           x0lab = NULL,
           x1lab = NULL)
# Add the legend outside the plot area at the bottom
par(xpd=TRUE)
legend("bottom", legend = c("Raw Data", "Ebal Weighted"),
       col = c(alpha("#BF2C23", 0.6), alpha("#2F67B1", 1)),
       pch = c(16, 4), pt.cex = 2, horiz = TRUE, inset = -0.3, xpd = NA)
# dev.off()

# Reset plotting parameters
par(old_par)

